3  Initial Exploration

We don’t even really know the best way to even start looking at these relationships, so this portion will contain some exploratory visualizations.

So to start, I’m just going to pull out a few sites where I would expect to see some influence - sites that aren’t in the bays, but are maybe around the edges. I’ll hand-pick and hard-code, to figure out what this all even looks like, then figure out how to scale up.

We have monthly precipitation data and hydrologic load data.

Code
library(mapview)
library(sf)
library(tidyverse)
library(tmap)
library(tbeptools)
library(khroma)
library(corrplot)

3.1 Data Import

Read in shapefiles and other data files here.

Code
swfwmdpixels <- read_sf(here::here("GIS", "swfwmd_pixel_2_utm_m_83.shp"))
bay_segs <- read_sf(here::here("GIS", "TBEP-Bay_Segments.shp")) |> 
    st_transform(crs = st_crs(swfwmdpixels))

hydro <- readxl::read_xlsx(here::here("hydro-load-data",
                                      "monthly-hydrology.xlsx"))

hydro <- hydro |> 
    rename(hydro_load = hy_load_106_m3_mo) |>
    group_by(bay_segment) |> 
    mutate(lag1_hydro = lag(hydro_load)) |> 
    ungroup()

hydro_sub <- hydro |> 
    dplyr::filter(year >= 2009)

Subset pixel file, merge datasets that need merging, turn into spatial objects.

Code
precip_pixels <- st_intersection(swfwmdpixels, bay_segs)


# pixels to pull rainfall from,
# and their associated bay segments
# (for group-wise averaging)  
pixs <- precip_pixels |> 
    st_drop_geometry() |> 
    select(PIXEL, BAY_SEG)


# read in compiled rainfall (see R/rainfall_preprocessing.R)
prcp <- readRDS(here::here("SWFWMD-rainfall-data", "compiledRainfall.rds"))
prcp2 <- prcp |> 
    filter(PIXEL %in% unique(pixs$PIXEL))
# okaaaay, guess I managed to only pull Tampa Bay data anyway. Good check though.  
rm(prcp2)

daily_prcp <- readRDS(here::here("SWFWMD-rainfall-data", "Daily_Data", "compiledDailyRainfall.rds"))

Which precip to look at? Total for watershed? total within X kilometers of a station? (and what should X be?) Maybe one task is to figure out what’s best. Could vary by watershed too though - Hillsborough Bay’s is way bigger than Middle Tampa Bay’s…. maybe a bigger time lag in situations like that (but still, if we’re only looking at monthly….)

Code
# pull a few months and map both by pixel, 
# and averaged up to bay segment
# make sure it works  

# there are some pixels along bay segment boundaries
# I set this to be a full join so that the precip from
# those pixels goes into each bay segment average
# that they're part of

prcp <- prcp |> 
    full_join(pixs, by = "PIXEL",
              relationship = "many-to-many") |> 
    group_by(PIXEL) |> 
    mutate(prcp_lag1 = lag(inches)) |> 
    ungroup()

daily_prcp <- daily_prcp |> 
    full_join(pixs, by = "PIXEL",
              relationship = "many-to-many") |> 
    group_by(PIXEL) |> 
    mutate(daily_lag1 = lag(inches, 1),
           daily_lag2 = lag(inches, 2),
           daily_lag3 = lag(inches, 3),
           daily_lag4 = lag(inches, 4),
           daily_lag5 = lag(inches, 5),
           daily_lag6 = lag(inches, 6),
           daily_24hrs = inches + daily_lag1,
           daily_48hrs = inches + daily_lag1 + daily_lag2,
           daily_7d = inches + daily_lag1 + daily_lag2 + daily_lag3 + daily_lag4 + daily_lag5 + daily_lag6) |> 
    ungroup() |> 
    select(PIXEL, date, inches, BAY_SEG, 
           daily_24hrs, daily_48hrs, daily_7d)

prcp_bayseg <- prcp |> 
    summarize(.by = c(BAY_SEG, month, year),
              inches = mean(inches, na.rm = TRUE)) |> 
    filter(year == 2022) |> 
    left_join(bay_segs) |> 
    st_as_sf()

prcp_daily_bayseg <- daily_prcp |> 
    summarize(.by = c(BAY_SEG, date),
              across(c(inches, daily_24hrs, daily_48hrs, daily_7d),
                     function(x) mean(x, na.rm = TRUE)))

prcp_pixel <- prcp |> 
    select(-BAY_SEG) |> 
    distinct() |> 
    filter(year == 2022) |> 
    left_join(select(precip_pixels, PIXEL, geometry)) |> 
    st_as_sf()

3.2 Precip maps, comparing pixel-level to bay-segment-level

Code
prcp_bayseg |> 
    filter(month == 1) |> 
    mapview(zcol = "inches",
            layer.name = "Jan 2022")
Code
prcp_pixel |> 
    filter(month == 1) |> 
    mapview(zcol = "inches",
            layer.name = "Jan 2022")
Code
tm_shape(prcp_bayseg) +
    tm_borders() +
    tm_fill("inches",
            n = 6,
            palette = "YlGnBu") +
    tm_facets(by = "month", nrow = 4)

Code
tm_shape(prcp_pixel) +
    tm_fill("inches",
            n = 6,
            palette = "YlGnBu") +
    tm_facets(by = "month", nrow = 4)

Variations throughout a year are so large that my worry about averaging up to bay segment being too rough is…. not really relevant after all. Should be fine.

Code
prcp_bayseg <- prcp |> 
    summarize(.by = c(BAY_SEG, month, year),
              inches = mean(inches, na.rm = TRUE)) |> 
    filter(year == 2022) |> 
    left_join(bay_segs) |> 
    st_as_sf()

tm_shape(prcp_bayseg) +
    tm_borders() +
    tm_fill("inches",
            palette = "YlGnBu",
            n = 10) +
    tm_facets(by = c("month", "year"))

Bay segment level should be fine.

3.3 Precip and hydrologic loads

I’d expect these to correlate pretty well. Wonder if that changes by month and/or wet vs. dry season. Check that out here.

Subset to only the specific bay segments in both datasets (am unsure whether to average precip across different bay segments, which may be different sizes….. probably should do that at the pixel-to-bay-seg level if I want to)

Code
combnd <- prcp |> 
    mutate(bay_segment = case_match(BAY_SEG,
                                    "Boca Ciega Bay" ~ "Remainder Lower Tampa Bay",
                                    "Manatee River" ~ "Remainder Lower Tampa Bay",
                                    "Terra Ceia Bay" ~ "Remainder Lower Tampa Bay",
                                    .default = BAY_SEG)) |> 
    summarize(.by = c(bay_segment, month, year),
              inches = mean(inches, na.rm = TRUE)) |> 
    group_by(bay_segment) |> 
    mutate(lag1_prcp = lag(inches)) |> 
    ungroup() |> 
    filter(year >= 2009,
           bay_segment %in% unique(hydro$bay_segment)) |>
    left_join(hydro, by = c("bay_segment", "year", "month"))

May have log-log relationships, or non-linear relationships, or need to look at it some different way. Let’s see.

Code
p <- ggplot(combnd) +
    geom_point(aes(x = inches, y = hydro_load,
                   col = bay_segment))

p

Generally positive relationship (as you’d expect) but lots of spread, especially at higher precipitation levels.

Looks like a lot of that variability is due to the different bay segments.

Code
p +
    facet_wrap(~bay_segment)

Hillsborough Bay has a lot of variability - not surprising since it’s the biggest of the watersheds. Looks like the relationships hold until about 12 inches, then higher points are both infrequent and possibly show some leveling off. This includes ~15 years of data (2009 and later) and looks almost exactly the same as when I used only 2016 and above.

Quick look at hydrologic load and the previous month’s precip:

Code
ggplot(combnd) +
    geom_point(aes(x = lag1_prcp, y = hydro_load,
                   col = bay_segment)) +
    facet_wrap(~bay_segment) +
    theme(legend.position = "none") +
    labs(x = "previous month's precipitation (inches)",
         y = "hydrologic load")

Looks pretty similar, though with more variation; not entirely unexpected at this aggregation level.

Code
ggplot(combnd) +
    geom_point(aes(x = inches, y = hydro_load,
                   col = factor(month))) +
    scale_color_brewer(palette = "Paired") +
    facet_wrap(~bay_segment)

Makes sense that the relationship would vary by bay segment/watershed size. In Hillsborough Bay, we sort of see Aug-October on the high side of the hydrologic loads, even for the same amount of precipitation as in (e.g.) April.

Code
ggplot(combnd) +
    geom_point(aes(x = inches, y = hydro_load,
                   col = bay_segment)) +
    scale_color_brewer(palette = "Set1") +
    facet_wrap(~month)

Varies a lot by month and bay segment, especially in September.

Code
ggplot(combnd) +
    geom_boxplot(aes(x = factor(month), y = inches,
                    col = bay_segment)) +
    scale_color_brewer(palette = "Set1") +
    labs(title = "Precip variability by month x bay segment",
         x = "Month",
         y = "Monthly Total Precipitation (inches)")

Code
ggplot(combnd) +
    geom_boxplot(aes(x = factor(month), y = hydro_load,
                    col = bay_segment)) +
    scale_color_brewer(palette = "Set1") +
    labs(title = "Hydro load variability by month x bay segment",
         x = "Month",
         y = "Monthly hydrologic load (10^6 m3)")

Precip is similar between watersheds; hydrologic load is highest in Hillsborough Bay and lowest in lower Tampa Bay. Middle and Old Tampa Bay are pretty comparable.

4 Hydrologic Loads and Enterococcus

Code
fib_dat <- readRDS(here::here("fib-data", "tbeptools_importwqp.rds"))

fib_sub <- fib_dat |> 
    filter(yr >= 2009,
           class == "Estuary",
           var == "ecocci")

# get those attached to bay segments  
fib_baysegs <- fib_sub |> 
    st_as_sf(coords = c("Longitude", "Latitude"),
             crs = "WGS84",
             remove = FALSE) |>    # WGS84 is default for handheld GPS units
    st_transform(crs = st_crs(swfwmdpixels)) |> 
    st_intersection(bay_segs) |> 
    mutate(bay_segment = case_match(BAY_SEG,
                                    "Boca Ciega Bay" ~ "Remainder Lower Tampa Bay",
                                    "Manatee River" ~ "Remainder Lower Tampa Bay",
                                    .default = BAY_SEG))

fib_hydro <- left_join(fib_baysegs, combnd,
                       by = c("yr" = "year",
                              "mo" = "month",
                              "bay_segment"))

4.0.1 Attach daily precipitation

Code
fib_daily_prcp <- fib_baysegs |> 
    mutate(date = as.Date(SampleTime)) |> 
    left_join(prcp_daily_bayseg,
              by = c("date", "BAY_SEG"))

4.1 Entero by monthly hydro load

Code
ggplot(fib_hydro, aes(x = hydro_load,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. hydrologic load; log-log scale",
         x = "Hydrologic Load",
         y = "Enterococcus",
         col = "Month")

Interesting….. it’s not necessarily the monthly hydrologic loads that matter (though it’s clear that the higher hydrologic loads occur in certain months). Could come down to precipitation in some previous time periods.

Could also be that because the majority of stations are in the bay proper, they’re diluted and that’s driving the lack-of-relationship. Really need to break this all out by “in-bay” vs. “have some land near the station”.

4.1.1 Lagged hydro load

Code
ggplot(fib_hydro, aes(x = lag1_hydro,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. lagged hydrologic load; log-log scale",
         x = "Hydrologic Load in previous month",
         y = "Enterococcus",
         col = "Month")

Nothing new.

First a little more exploration of entero concentrations by time of year.

4.2 Entero by monthly precip

Code
ggplot(fib_hydro, aes(x = inches,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.3) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. Monthly Precip",
         x = "Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

We see it a bit more with precip rather than hydrologic load, at least in the higher values.

4.2.1 Previous month’s precip

Code
ggplot(fib_hydro, aes(x = lag1_prcp,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.3) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. Lagged Precip",
         x = "Precipitation (in) in previous month",
         y = "Enterococcus",
         col = "Month")

Yeah, not that different.

4.3 Entero by month

Code
ggplot(fib_hydro,
       aes(x = factor(mo),
           y = val)) +
    geom_jitter(aes(col = factor(mo)),
                size = 0.7,
                alpha = 0.4) +
    geom_boxplot(alpha = 0) +
    scale_y_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero counts by month",
         x = "Month",
         y = "Enterococcus") +
    theme(legend.position = "none")

4.4 Hydro load by month

Code
ggplot(hydro_sub,
       aes(x = factor(month),
           y = hydro_load)) +
    geom_jitter(aes(col = factor(month)),
                size = 0.7,
                alpha = 0.6) +
    geom_boxplot(alpha = 0) +
    scale_y_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Hydrologic Load by month",
         x = "Month",
         y = "Hydrologic Load") +
    theme(legend.position = "none")

4.5 Precip by month

Code
ggplot(combnd,
       aes(x = factor(month),
           y = inches)) +
    geom_jitter(aes(col = factor(month)),
                size = 0.7,
                alpha = 0.6) +
    geom_boxplot(alpha = 0) +
    scale_y_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Precip by month",
         x = "Month",
         y = "Monthly Precipitation (inches)") +
    theme(legend.position = "none")

5 Inside bay vs. not

Code
tbseg2 <- tbseg |> 
    st_transform(crs = st_crs(swfwmdpixels))

fib_nonbay <- st_filter(fib_hydro, st_union(tbseg2), .predicate = st_disjoint)

Note, this isn’t good spatial coverage - may want to think about including freshwater sites too (eek).

Code
mapview(fib_nonbay)

5.1 Entero by month

Code
ggplot(fib_nonbay,
       aes(x = factor(mo),
           y = val)) +
    geom_jitter(aes(col = factor(mo)),
                size = 0.7,
                alpha = 0.4) +
    geom_boxplot(alpha = 0) +
    scale_y_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero counts by month",
         subtitle = "stations that are not in the bay proper",
         x = "Month",
         y = "Enterococcus") +
    theme(legend.position = "none")

Still really not any cycling…. surprising. Maybe a little in Hillsborough Bay.

5.2 Entero by monthly hydro load

Code
ggplot(fib_nonbay, aes(x = hydro_load,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    # scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. hydrologic load",
         subtitle = "stations that are not in the bay proper",
         x = "Hydrologic Load",
         y = "Enterococcus",
         col = "Month")

Code
ggplot(fib_nonbay, aes(x = hydro_load,
                      y = val)) +
    geom_jitter(aes(col = bay_segment),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    # facet_wrap(~bay_segment) +
    labs(title = "Entero vs. hydrologic load; log-log scale",
         subtitle = "stations that are not in the bay proper",
         x = "Hydrologic Load",
         y = "Enterococcus",
         col = "Month")

5.3 Entero and precip

Code
ggplot(fib_nonbay, aes(x = inches,
                      y = val)) +
    geom_jitter(aes(col = bay_segment),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    # scale_x_log10() +
    # facet_wrap(~bay_segment) +
    labs(title = "Entero vs. precip",
         subtitle = "stations that are not in the bay proper",
         x = "Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

Code
ggplot(fib_nonbay, aes(x = inches,
                      y = val)) +
    geom_jitter(aes(col = bay_segment),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    # scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. precip",
         subtitle = "stations that are not in the bay proper",
         x = "Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

Code
ggplot(fib_nonbay, aes(x = inches,
                      y = val)) +
    geom_jitter(aes(col = bay_segment),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. precip; log-log scale",
         subtitle = "stations that are not in the bay proper",
         x = "Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

Changing the x-axis between log and ‘regular’ really changes how it looks.

5.3.1 Lagged precip

Code
ggplot(fib_nonbay, aes(x = lag1_prcp,
                      y = val)) +
    geom_jitter(aes(col = bay_segment),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    # scale_x_log10() +
    # facet_wrap(~bay_segment) +
    labs(title = "Entero vs. lagged precip",
         subtitle = "stations that are not in the bay proper",
         x = "Previous Month's Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

Code
ggplot(fib_nonbay, aes(x = lag1_prcp,
                      y = val)) +
    geom_jitter(aes(col = as.factor(mo)),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    # scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. lagged precip",
         subtitle = "stations that are not in the bay proper",
         x = "Previous Month's Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

Code
ggplot(fib_nonbay, aes(x = lag1_prcp,
                      y = val)) +
    geom_jitter(aes(col = factor(mo)),
               size = 2,
               alpha = 0.5) +
    geom_smooth() +
    geom_rug(col = "gray40") +
    scale_y_log10() +
    scale_x_log10() +
    facet_wrap(~bay_segment) +
    labs(title = "Entero vs. lagged precip; log-log scale",
         subtitle = "stations that are not in the bay proper",
         x = "Precipitation (in)",
         y = "Enterococcus",
         col = "Month")

6 A few individual sites

OrgID 21FLHILL_WQX Stations 22, 136, 036, 054

Code
fib_tiny <- fib_hydro |> 
    dplyr::filter(OrgID == "21FLHILL_WQX",
           Station %in% c("22", "136", "036", "054"))
mapview(fib_tiny)

6.1 Time Series

Code
fib_long <- fib_tiny |> 
    mutate(Date = lubridate::ymd(paste(yr, mo, "01"))) |> 
    rename(Entero = val) |> 
    select(OrgID, Station, Date, Entero, bay_segment, inches, hydro_load, lag1_prcp, lag1_hydro, geometry) |> 
    pivot_longer(c(Entero, inches, hydro_load, lag1_prcp, lag1_hydro),
                 names_to = "variable",
                 values_to = "value")
Code
ggplot(fib_long, aes(x = Date, y = value, col = Station)) +
    geom_point() +
    geom_line() +
    scale_y_log10() +
    facet_wrap(~variable, scales = "free_y",
               ncol = 1)

Well well well, we have at least one disappearing station, and it’s one where enterococcus was generally high.

Code
ggplot(fib_tiny, aes(x = inches, y = val, col = Station, group = Station)) +
    geom_point(size = 2, alpha = 0.7) +
    scale_y_log10() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(x = "Precip",
         y = "Entero")

And really it looks like values within a station are sort of within their own range, different than other stations, as opposed to really correlating with precip (at least these few stations, on this graph). Maybe a bit of a relationship if we force it to linear rather than loess.

Code
ggplot(fib_tiny, aes(x = lag1_prcp, y = val, col = Station, group = Station)) +
    geom_point(size = 2, alpha = 0.7) +
    scale_y_log10() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(x = "Lagged Precip",
         y = "Entero")

A little different, but not much.

Code
ggplot(fib_tiny, aes(x = hydro_load, y = val, col = Station, group = Station)) +
    geom_point(size = 2, alpha = 0.7) +
    scale_y_log10() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(x = "Hydro load",
         y = "Entero")

7 experimentation

Code
fib_tiny <- fib_hydro[c(1, 100, 400, 600, 2000, 5000, 8000, 10000), ]
mapview(fib_tiny)

nearBorder <- rowSums(st_is_within_distance(fib_tiny, tbseg2, dist = 10, sparse = FALSE))

fib_within_x <- fib_tiny[nearBorder > 0, ]
mapview(fib_within100) + mapview(tbseg2)

8 Daily-level precipitation

Incorporate day-of-sampling, day + day before (24 hrs), day + 2 days before (48 hrs), and day + week before (7-day)

Working with fib_daily_prcp data frame.

Code
fib_daily_nonbay <- st_filter(fib_daily_prcp, st_union(tbseg2), .predicate = st_disjoint)

fib_daily_long <- fib_daily_prcp |> 
    pivot_longer(inches:daily_7d,
                 names_to = "time_period",
                 values_to = "total") |> 
    mutate(time_period = case_match(time_period,
                                    "inches" ~ "day of sample",
                                    "daily_24hrs" ~ "sample + previous day",
                                    "daily_48hrs" ~ "sample + 2 days",
                                    "daily_7d" ~ "previous week"))
fib_daily_long_nonbay <- fib_daily_long |> 
    filter(Station %in% fib_daily_nonbay$Station)
Code
ggplot(fib_daily_long, aes(x = total, y = val)) +
    geom_point(aes(col = time_period),
               alpha = 0.4) +
    facet_wrap(~BAY_SEG) +
    scale_x_log10() +
    scale_y_log10() +
    scale_color_okabeito() +
    labs(title = "All stations",
         subtitle = "note log-log scale",
         x = "Precip (inches)",
         y = "Enterococcus",
         col = "time period of precip total")

Code
ggplot(fib_daily_long, aes(x = total, y = val)) +
    geom_point(aes(col = BAY_SEG),
               alpha = 0.4) +
    facet_wrap(~time_period) +
    scale_x_log10() +
    scale_y_log10() +
    scale_color_okabeito() +
    labs(title = "All stations",
         subtitle = "note log-log scale",
         x = "Precip (inches)",
         y = "Enterococcus",
         col = "Bay Segment")

Code
ggplot(fib_daily_long_nonbay, aes(x = total, y = val)) +
    geom_point(aes(col = time_period),
               alpha = 0.4) +
    facet_wrap(~BAY_SEG) +
    scale_x_log10() +
    scale_y_log10() +
    scale_color_okabeito() +
    labs(title = "Non-bay stations",
         subtitle = "note log-log scale",
         x = "Precip (inches)",
         y = "Enterococcus",
         col = "time period of precip total")

Code
ggplot(fib_daily_long_nonbay, aes(x = total, y = val)) +
    geom_point(aes(col = BAY_SEG),
               alpha = 0.4) +
    facet_wrap(~time_period) +
    scale_x_log10() +
    scale_y_log10() +
    scale_color_okabeito() +
    labs(title = "Non-bay stations",
         subtitle = "note log-log scale",
         x = "Precip (inches)",
         y = "Enterococcus",
         col = "Bay Segment")

8.0.1 Calculate correlations

Spearman correlations

8.0.1.1 All stations, all bay segments combined

Code
all_stns <- fib_daily_prcp |> 
    st_drop_geometry() |> 
    select(val, inches:daily_7d) |> 
    set_names(c("Entero", "sample_day_precip",
                "sample_and_24hrs",
                "sample_and_48hrs",
                "previous_week"))
Code
corr_all <- cor(all_stns,
    use = "pairwise.complete.obs",
    method = "spearman")

knitr::kable(corr_all,
             caption = "Spearman correlation coefficients",
             digits = 3)
Spearman correlation coefficients
Entero sample_day_precip sample_and_24hrs sample_and_48hrs previous_week
Entero 1.000 0.099 0.145 0.161 0.132
sample_day_precip 0.099 1.000 0.871 0.709 0.508
sample_and_24hrs 0.145 0.871 1.000 0.845 0.588
sample_and_48hrs 0.161 0.709 0.845 1.000 0.706
previous_week 0.132 0.508 0.588 0.706 1.000
Code
knitr::kable(cor.mtest(corr_all)$p,
             caption = "p-values of Spearman correlations",
             digits = 3)
p-values of Spearman correlations
Entero sample_day_precip sample_and_24hrs sample_and_48hrs previous_week
Entero 0.000 0.057 0.041 0.035 0.109
sample_day_precip 0.057 0.000 0.013 0.120 0.521
sample_and_24hrs 0.041 0.013 0.000 0.038 0.400
sample_and_48hrs 0.035 0.120 0.038 0.000 0.180
previous_week 0.109 0.521 0.400 0.180 0.000

8.0.1.2 Non-bay stations, all bay segments combined

Code
nonbay_stns <- fib_daily_nonbay |> 
    st_drop_geometry() |> 
    select(val, inches:daily_7d) |> 
    set_names(c("Entero", "sample_day_precip",
                "sample_and_24hrs",
                "sample_and_48hrs",
                "previous_week"))
Code
corr_all <- cor(nonbay_stns,
    use = "pairwise.complete.obs",
    method = "spearman")

knitr::kable(corr_all,
             caption = "Spearman correlation coefficients",
             digits = 3)
Spearman correlation coefficients
Entero sample_day_precip sample_and_24hrs sample_and_48hrs previous_week
Entero 1.000 0.181 0.264 0.284 0.271
sample_day_precip 0.181 1.000 0.843 0.729 0.536
sample_and_24hrs 0.264 0.843 1.000 0.906 0.651
sample_and_48hrs 0.284 0.729 0.906 1.000 0.763
previous_week 0.271 0.536 0.651 0.763 1.000
Code
knitr::kable(cor.mtest(corr_all)$p,
             caption = "p-values of Spearman correlations",
             digits = 3)
p-values of Spearman correlations
Entero sample_day_precip sample_and_24hrs sample_and_48hrs previous_week
Entero 0.000 0.045 0.042 0.050 0.175
sample_day_precip 0.045 0.000 0.035 0.147 0.606
sample_and_24hrs 0.042 0.035 0.000 0.020 0.395
sample_and_48hrs 0.050 0.147 0.020 0.000 0.178
previous_week 0.175 0.606 0.395 0.178 0.000

8.0.1.3 Non-bay stations, by bay segment

Code
options(scipen = 999)

my_cor <- function(df, x){
    cor(df[["val"]], df[[x]],
        method = "spearman",
        use = "pairwise.complete.obs")
}

my_cor.p <- function(df, x){
    cor.test(df[["val"]], df[[x]],
        method = "spearman",
        use = "pairwise.complete.obs")$p.value
}

nonbay_stns2 <- fib_daily_nonbay |> 
    st_drop_geometry() |> 
    filter(BAY_SEG != "Manatee River") |> # no values, messign things up
    select(BAY_SEG, val, inches:daily_7d) |> 
    summarize(.by = BAY_SEG,
              cor.sampleday = my_cor(.data, "inches"),  # this could all be more elegant
              pval.sampleday = my_cor.p(.data, "inches"),
              cor.24hrs = my_cor(.data, "daily_24hrs"),
              pval.24hrs = my_cor.p(.data, "daily_24hrs"),
              cor.48hrs = my_cor(.data, "daily_48hrs"),
              pval.48hrs = my_cor.p(.data, "daily_48hrs"),
              cor.7d = my_cor(.data, "daily_7d"),
              pval.7d = my_cor.p(.data, "daily_7d"))

knitr::kable(nonbay_stns2,
             caption = "Spearman Correlations and p-values for Entero~rainfall by bay segment",
             digits = 3)    
Spearman Correlations and p-values for Entero~rainfall by bay segment
BAY_SEG cor.sampleday pval.sampleday cor.24hrs pval.24hrs cor.48hrs pval.48hrs cor.7d pval.7d
Boca Ciega Bay 0.193 0.043 0.415 0.000 0.511 0.000 0.527 0.000
Hillsborough Bay 0.271 0.000 0.305 0.000 0.337 0.000 0.325 0.000
Lower Tampa Bay 0.040 0.651 0.031 0.728 0.078 0.371 0.088 0.314
Middle Tampa Bay 0.233 0.007 0.315 0.000 0.280 0.001 0.289 0.001
Old Tampa Bay 0.072 0.580 0.165 0.200 0.163 0.206 0.086 0.505